package per.hnvcam.searchext.parser.impl;

import org.apache.tika.parser.html.DefaultHtmlMapper;
import org.apache.tika.parser.html.HtmlMapper;

import java.util.HashSet;
import java.util.Set;

/**
 * Created by IntelliJ IDEA.
 * User: camhoang
 * Date: 3/25/12
 * Time: 5:23 PM
 * To change this template use File | Settings | File Templates.
 */
public class HtmlTagMapperImpl extends DefaultHtmlMapper {

   private static final Set<String> DISCARDABLE_ELEMENTS = new HashSet<String>() {{
        add("STYLE");
        add("SCRIPT");
        add("IMG");
        add("EMBED");
        add("AREA");
        add("INPUT");
    }};

   @Override
   public boolean isDiscardElement(String name) {
      return DISCARDABLE_ELEMENTS.contains(name);
   }
}
